This is an analysis for the impact of introducing the lunch program in 1955 the growth trend of the Japanese Boys
The data source is Ministry of Internal Affairs and Communications Statistics Bureau.
www.e-stat.go.jp/SG1/estat/List.do?bid=000001014499&
NOTE: each column represent an age group
data <- read_xlsx("FEH_00400002_221209195404.xlsx")
data[data == "***"] <- NA
datatable(data)
Changing the data to long format
data <- data %>% select(year, y17) %>% rename(height = y17) %>% mutate(height = as.numeric(height)) %>% mutate(year = year - 1900)
datatable(data)
max_height <- 175
data$height_scaled <- data$height / max_height
data$logit_height <- log(data$height_scaled / (1 - data$height_scaled))
model_all <- lm(logit_height ~ year, data = data)
tbl_regression1 <- tbl_regression(model_all)
data$predicted_all <- predict(model_all, newdata = data)
data$predicted_all <- exp(data$predicted_all)/(1+exp(data$predicted_all))*max_height
datatable(data)
ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
scale_color_manual(labels = c("Observed Heights", "Predicted Data"), values = c("blue", "red")) + geom_line() + geom_line(aes(y = predicted_all, colour = "Predicted Data"))
war_year <- 1940 - 1900
end_war_year <- 1948 - 1900
data_pre_war <- data %>% filter(year <= war_year)
fit_pre_war <- lm(logit_height ~ year, data = data_pre_war)
tbl_regression2 <- tbl_regression(fit_pre_war)
data$predicted_pre_war <- predict(fit_pre_war, newdata = data)
data$predicted_pre_war <- exp(data$predicted_pre_war)/(1+exp(data$predicted_pre_war))*max_height
datatable(data)
ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
scale_color_manual(labels = c("Observed Heights", "Predicted Data Full Model", "Pre War Model"), values = c("blue", "red", "black")) + geom_line() + geom_line(aes(y = predicted_all, colour = "Predicted Data")) + geom_line(aes(y = predicted_pre_war, colour = "Pre War Projection"))
data_post_war <- data %>% filter(year >= end_war_year)
fit_post_war <- lm(logit_height ~ I(year^2) + year, data = data_post_war)
tbl_regression3 <- tbl_regression(fit_post_war)
data$predicted_post_war <- predict(fit_post_war, newdata = data)
data$predicted_post_war <- exp(data$predicted_post_war)/(1+exp(data$predicted_post_war))*max_height
datatable(data)
data_post_war <- data %>% filter(year >= end_war_year)
fit_post_war_direct <- lm(logit_height ~ I(year^2), data = data_post_war)
tbl_regression4 <- tbl_regression(fit_post_war_direct)
data$predicted_post_war_direct <- predict(fit_post_war_direct, newdata = data)
data$predicted_post_war_direct <- exp(data$predicted_post_war_direct)/(1+exp(data$predicted_post_war_direct))*max_height
datatable(data)
tbl_merge(
tbls = list(tbl_regression1, tbl_regression2, tbl_regression3, tbl_regression4),
tab_spanner = c("**Full Data**", "**Pre War**", "Post War Full", "Post War Squared Only")
)
| Characteristic | Full Data | Pre War | Post War Full | Post War Squared Only | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | Beta | 95% CI1 | p-value | |
| year | 0.02 | 0.01, 0.02 | <0.001 | 0.01 | 0.01, 0.01 | <0.001 | 0.09 | 0.08, 0.09 | <0.001 | |||
| I(year^2) | 0.00 | 0.00, 0.00 | <0.001 | 0.00 | 0.00, 0.00 | <0.001 | ||||||
| 1 CI = Confidence Interval | ||||||||||||
ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
scale_color_manual(labels = c("Observed Heights", "Direct Fit", "Poly"), values = c("blue", "red", "black")) +
geom_line() +
geom_line(aes(y = predicted_post_war_direct, colour = "Direct Fit")) +
geom_line(aes(y = predicted_post_war, colour = "Poly"))
data <- data %>% filter(year >= 40)
full_plot <-ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
scale_color_manual(labels = c("Observed Heights", "Predicted Data Full Model", "Pre War Model", "Post War Model Poly", "Post War Direct"), values = c("blue", "red", "black", "green", "grey")) +
geom_line() +
geom_line(aes(y = predicted_all, colour = "Predicted Data")) +
geom_line(aes(y = predicted_pre_war, colour = "Pre War Projection")) +
geom_line(aes(y = predicted_post_war, colour = "Post War Model Poly")) +
geom_line(aes(y = predicted_post_war, colour = "Post War Direct"))
ggplotly(full_plot)
data <- read_xlsx("FEH_00400002_221209195404.xlsx")
data[data == "***"] <- NA
datatable(data)
height_list <- list()
for(i in 3:14) {
pre_col_name <- colnames(data)[i]
rdata <- data %>% select(year, pre_col_name) %>% rename(height = pre_col_name) %>%
mutate(height = as.numeric(height)) %>% mutate(year = year - 1900)
# Transformation
max_height <- 175
rdata$height <- as.numeric(rdata$height)
rdata$height_scaled <- rdata$height / max_height
rdata$logit_height <- log(rdata$height_scaled / (1 - rdata$height_scaled))
# setting war parameters
war_year <- 1940 - 1900
end_war_year <- 1948 - 1900
# Pre-war-Model
rdata_pre_war <- na.omit(rdata %>% filter(year <= war_year))
rfit_pre_war <- lm(logit_height ~ year, data = rdata_pre_war)
rdata$predicted_pre_war <- predict(rfit_pre_war, newdata = rdata)
rdata$predicted_pre_war <- exp(rdata$predicted_pre_war)/(1+exp(rdata$predicted_pre_war))*max_height
# Post_war_model
data_post_war <- rdata %>% filter(year >= end_war_year)
fit_post_war <- lm(logit_height ~ I(year^2) + year, data = data_post_war)
rdata$predicted_post_war <- predict(fit_post_war, newdata = rdata)
rdata$predicted_post_war <- exp(rdata$predicted_post_war)/(1+exp(rdata$predicted_post_war))*max_height
# assign to list
rdata <- rdata %>% filter(year >= 40)
height_list[[i-2]] <- rdata
}
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(pre_col_name)
##
## # Now:
## data %>% select(all_of(pre_col_name))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
# Add a new column to each data frame that represents the number of the data frame in the list
for (i in 1:length(height_list)) {
height_list[[i]]$df_num <- i
}
df_combined <- do.call(rbind, height_list)
factors_labels <- vector()
for(i in unique(df_combined$df_num)) {
factors_labels <- c(factors_labels, paste("Age Group", i + 5) )
}
# Exchanging the number by the age group
df_combined$df_num <- factor(df_combined$df_num, levels = unique(df_combined$df_num),
labels = factors_labels
)
# adding 1900 to the year
df_combined$year <- df_combined$year + 1900
plot_list <- list()
for (i in 1:length(height_list)) {
df <- na.omit(height_list[[i]])
p <- ggplot(data = height_list[[i]], aes(x = year, y = height, colour = "Oberved Heights")) +
scale_color_manual(labels = c("Observed Heights", "Pre War", "Post War"), values = c("blue", "red", "black")) +
geom_line() +
geom_line(aes(y = predicted_pre_war, colour = "Pre War")) +
geom_line(aes(y = predicted_post_war, colour = "Post War"))
plot_list[[i]] <- p
i <- i + 1
}
lapply(seq_along(plot_list), function(i) {
ggsave(plot_list[[i]], file = paste(i, ".png", sep = ""))
})
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## [[1]]
## [1] "1.png"
##
## [[2]]
## [1] "2.png"
##
## [[3]]
## [1] "3.png"
##
## [[4]]
## [1] "4.png"
##
## [[5]]
## [1] "5.png"
##
## [[6]]
## [1] "6.png"
##
## [[7]]
## [1] "7.png"
##
## [[8]]
## [1] "8.png"
##
## [[9]]
## [1] "9.png"
##
## [[10]]
## [1] "10.png"
##
## [[11]]
## [1] "11.png"
##
## [[12]]
## [1] "12.png"
ggplot(df_combined, aes(x = year, y = height, color = df_num)) +
geom_line(aes(group = df_num)) +
scale_x_continuous(limits = c(min(df_combined$year), max(df_combined$year))) +
scale_y_continuous(limits = c(min(df_combined$height), max(df_combined$height))) +
labs(x = "Year", y = "Height") +
scale_color_discrete(name = "Age Group") +
theme(legend.title = element_text(size = 12),
legend.text = element_text(size = 10))
height_list <- list()
for(i in 3:14) {
pre_col_name <- colnames(data)[i]
rdata <- data %>% select(year, pre_col_name) %>% rename(height = pre_col_name) %>%
mutate(height = as.numeric(height)) %>% mutate(year = year - 1900)
# Transformation
max_height <- 175
rdata$height <- as.numeric(rdata$height)
rdata$height_scaled <- rdata$height / max_height
rdata$logit_height <- log(rdata$height_scaled / (1 - rdata$height_scaled))
# setting war parameters
war_year <- 1940 - 1900
end_war_year <- 1948 - 1900
# Pre-war-Model
rdata_pre_war <- na.omit(rdata %>% filter(year <= war_year))
rfit_pre_war <- lm(logit_height ~ year, data = rdata_pre_war)
rdata$predicted_pre_war <- predict(rfit_pre_war, newdata = rdata)
rdata$predicted_pre_war <- exp(rdata$predicted_pre_war)/(1+exp(rdata$predicted_pre_war))*max_height
# Post_war_model
data_post_war <- rdata %>% filter(year >= end_war_year)
fit_post_war <- lm(logit_height ~ I(year^2) + year, data = data_post_war)
rdata$predicted_post_war <- predict(fit_post_war, newdata = rdata)
rdata$predicted_post_war <- exp(rdata$predicted_post_war)/(1+exp(rdata$predicted_post_war))*max_height
# assign to list
height_list[[i-2]] <- rdata
}
meeting_point <- data.frame(age = 6:17, meeting_point = NA)
for(i in 1:length(height_list)){
df <- height_list[[i]]
min_index <- which.min(abs(df$predicted_pre_war - df$predicted_post_war))
meeting_point[i,2] <- df[min_index, ]$year
}
library("ggthemes")
ggplot(data = meeting_point, aes(x = age, y = meeting_point)) +
geom_point() +
theme_economist() +
labs(x = "Age", y = "Meeting Point (Year)")
data <- read_xlsx("FEH_00400002_221209195404.xlsx")
data <- arrange(data, year)
data[data == "***"] <- NA
new_data <- data[, -c(1,2)]
new_data <- as.matrix(new_data)
diag_list <- list()
for(i in 1:nrow(new_data)) {
diag_list[[i]] <- diag(new_data[i:nrow(new_data),])
}
df_list <- list()
age_vec <- 6:17
for (i in 1:length(diag_list)) {
df_list[[i]] <- data.frame(age=age_vec[1:length(diag_list[[i]])], height = diag_list[[i]])
}
ylim <- range(unlist(lapply(df_list, function(x) x$height)))
saveGIF({
for (i in 1:length(df_list)) {
plot(df_list[[i]]$age, df_list[[i]]$height, type="l", xlab="Age", ylab="Height",
ylim=c(floor(as.numeric(ylim[1])), ceiling(as.numeric(ylim[2]))), main=paste("Year:", 1900 + i),
col=i, lwd=2, las=1, cex.main=1.5, cex.lab=1.5)
abline(h=mean(df_list[[i]]$height), lty=2, col="gray")
}
}, interval=0.5, movie.name="growth_curves.gif")
img <- image_read("growth_curves.gif")
image_animate(img)